library(readr)
library(tibble)
library(tidyr)
Attaching package: ‘tidyr’
The following object is masked from ‘package:Matrix’:
expand
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(tidytext)
library(ggplot2)
library(tm)
Loading required package: NLP
Attaching package: ‘NLP’
The following object is masked from ‘package:ggplot2’:
annotate
library(topicmodels)
library(stringr)
weeknotes_ryan <- as_tibble(read_csv('weeknotes_ryan.csv', col_names = FALSE))
Parsed with column specification:
cols(
X1 = col_character(),
X2 = col_character(),
X3 = col_character(),
X4 = col_character(),
X5 = col_character(),
X6 = col_character(),
X7 = col_character(),
X8 = col_character(),
X9 = col_character(),
X10 = col_character()
)
names(weeknotes_ryan) <- c( "s02e01",
"s01e09",
"s01e08",
"s01e07",
"s01e06",
"s01e05",
"s01e04",
"s01e03",
"s01e02",
"s01e01"
)
weeknotes_ryan %>%
gather("episode", "text", 1:10) -> tidy_notes
tidy_notes
tidy_notes %>%
mutate(text2 = gsub("[.]", " ", text)) -> tidier_notes
tidier_notes %>%
unnest_tokens(word, text2) -> tidy_tokens
tidy_tokens